package com.g2room.driving.grap;

import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.eclipse.jetty.util.StringUtil;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.g2room.driving.util.DbUtil;
import com.g2room.driving.util.IpUtil;
import com.gargoylesoftware.htmlunit.WebClient;

public class GenericGrap {
	
	public static String[] uas = new String[]{
		"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0",
		"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
		"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
		"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
		"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2 ",
		"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
		"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
		"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
		"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
		"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
		"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400",
		"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) ",
		"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
		"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0) ",
		"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
		"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36"
	};
	private static String JSESSIONID = ""; 
	public static WebClient getWebClient() {
		 //创建一个webclient
		WebClient wc = new WebClient();
		String ip = IpUtil.getIp();
		wc.addRequestHeader("X-Real-IP", ip);
		wc.addRequestHeader("x-forwarded-for", ip);
		wc.addRequestHeader("WL-Proxy-Client-IP", ip);
		////设置请求报文头里的User-Agent字段
      wc.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2");
      //wc.addRequestHeader和request.setAdditionalHeader功能应该是一样的。选择一个即可。
      //其他报文头字段可以根据需要添加
      wc.getCookieManager().setCookiesEnabled(false);//开启cookie管理
      wc.getOptions().setJavaScriptEnabled(true);//开启js解析。对于变态网页，这个是必须的
      wc.getOptions().setCssEnabled(true);//开启css解析。对于变态网页，这个是必须的。
      wc.getOptions().setThrowExceptionOnFailingStatusCode(false);
      wc.getOptions().setThrowExceptionOnScriptError(false);
      wc.getOptions().setTimeout(10000);
      
      //htmlunit 对css和javascript的支持不好，所以请关闭之
      wc.getOptions().setJavaScriptEnabled(false);
      wc.getOptions().setCssEnabled(false);
      return wc;
	}

	public static Document getDoc(String url) {
		return getDoc(url, "utf-8");
	}
	public static Document getDoc(String url, String code) {
		Document doc = null;
		try {
			doc =  Jsoup.parse(new URL(url).openStream(), code, url);
		} catch (IOException e) {
			e.printStackTrace();
		}
		return doc;
	}
	
	public static Document getDocByIp(String url) throws IOException {
		String ip = IpUtil.getIp();
		Document doc = null;
		if(StringUtil.isBlank(url)) return doc;
		doc =Jsoup.connect(url.trim().replaceAll(" ", "").replaceAll("%20", ""))
			.header("X-Real-IP", ip)
			.header("X-Forwarded-For", ip)
			.header("WL-Proxy-Client-IP", ip)
			.header("Client-Ip", ip)
			.header("Host", "www.dianping.com")
			.header("User-Agent",uas[new Random().nextInt(uas.length -1)])
			.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.,*/*;q=0.8")
			.header("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
			.header("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7")
			.header("Accept-Encoding", "gzip, deflate")
			.header("Cookie", "pgv_pvi=996200086; pgv_pvid=6087801001; ts_uid=2275525435; Debc_2132_smile=5D1; pac_uid=1_252909344; h_uid=H253611172f; pt2gguin=o0252909344; RK=ZTNL1NEXNL; Debc_2132_ulastactivity=1499007815%7C0; ptcz=fe35afba54dfbac842b729a7e68e176789b5c95dd346fb6ccb4637ac64b2eeaf; tvfe_boss_uuid=68f235e980749e8e; Debc_2132_saltkey=v66585ZZ; Debc_2132_lastvisit=1498998870; Debc_2132_lastact=1499009253%09plugin.php%09viewthread; pgv_info=ssi=s2693045544&ssid=s1330600045; Debc_2132_st_p=320572%7C1499009250%7Cf4612b57c1c0798529bc9ff24a1b79f6; Debc_2132_viewid=tid_1911232; _qpsvr_localtk=0.2971818663449539; pgv_si=s1547369472; uin=o0252909344; skey=@rGsF39bcU; ptisp=cnc; luin=o0252909344; lskey=000100009aae3d563edcc321fd51d4e42f9207189840783126973ec34558e7aff05d5acace963b5a601f06a4; p_skey=*6COgrEWyfKaYUL3p*pn8vj7BQqxUi95827ExTcQCiw_; pt4_token=WhfDYgGLbDHaqHRSbfmV2ZRl8Cjd*BzkRht9oZC1j34_; p_lskey=000400004d0c221c0bfdb49e2017af19f077826d4fe3f261ea140e0ab7938d43a0deae19a71347acfddc40f0; Debc_2132_security_cookiereport=c31dMBPRUu4F%2B%2Fmc4F9NY%2BCfNlbuBpxFxfQw8Rrhf4HnCHUKB7GH; o_cookie=252909344; ts_last=mycq.qq.com/t-1911232-1.htm; Debc_2132_auth=cf8f3UNdA4jMTQn4mznvPyApQRRxrFTCTHcWjtWfaLxhWnSk; npc=3532")
//			.cookie("luin", "o0252909344")
//			.cookie("uin", "o0252909344")
//			.cookie("o_cookie", "252909344")
//			.cookie("pac_uid", "1_252909344")
//			.cookie("pt2gguin", "o0252909344")
//			.cookie("Debc_2132_auth", "cf8f3UNdA4jMTQn4mznvPyApQRRxrFTCTHcWjtWfaLxhWnSk")
//			.cookie("Debc_2132_checkpm", "1")
//			.cookie("Debc_2132_lastact", "1499009253%09plugin.php%09viewthread")
//			.cookie("Debc_2132_lastvisit", "1498998870")
//			.cookie("Debc_2132_noticeTitle", "1")
//			.cookie("Debc_2132_saltkey", "v66585ZZ")
//			.cookie("Debc_2132_security_cookiereport", "c31dMBPRUu4F%2B%2Fmc4F9NY%2BCfNlbuBpxFxfQw8Rrhf4HnCHUKB7GH")
//			.cookie("Debc_2132_sendmail", "1")
//			.cookie("Debc_2132_smile", "5D1")
//			.cookie("Debc_2132_st_p", "320572%7C1499009250%7Cf4612b57c1c0798529bc9ff24a1b79f6")
//			.cookie("Debc_2132_ulastactivity", "1499007815%7C0")
//			.cookie("Debc_2132_viewid", "tid_1911232")
//			.cookie("h_uid", "H253611172f")
//			.cookie("lskey", "000100009aae3d563edcc321fd51d4e42f9207189840783126973ec34558e7aff05d5acace963b5a601f06a4")
//			.cookie("p_lskey", "000400004d0c221c0bfdb49e2017af19f077826d4fe3f261ea140e0ab7938d43a0deae19a71347acfddc40f0")
//			.cookie("p_skey", "*6COgrEWyfKaYUL3p*pn8vj7BQqxUi95827ExTcQCiw_")
//			.cookie("pgv_info", "ssi=s2693045544&ssid=s1330600045")
//			.cookie("ts_refer", "cq.qq.com/qxnews")
//			.cookie("ts_uid", "4960345020")
//			.cookie("tvfe_boss_uuid", "68f235e980749e8e")
//			.cookie("npc", "3532")
//			.cookie("tjpctrl", "1499009642628")
//			.cookie("npc", "3532")
//			.cookie("npc", "3532")
//			.cookie("npc", "3532")
//			.cookie("npc", "3532")
//			.cookie("npc", "3532")
//			.cookie("npc", "3532")
//			.cookie("npc", "3532")
			.header("Host", "mycq.qq.com")
			
			.header("Connection", "keep-alive")
			.header("Upgrade-Insecure-Requests", "1")
			.timeout(5000)           // 设置连接超时时间
			.get(); // get方式访问
		return doc;
	}
	public static Document getPostDocByIp(String url) throws IOException {
		String ip = IpUtil.getIp();
		Document doc = null;
		doc =Jsoup.connect(url)
				.header("X-Real-IP", ip)
				.header("X-Forwarded-For", ip)
				.header("WL-Proxy-Client-IP", ip)
				.header("Client-Ip", ip)
				.header("Host", "www.dianping.com")
				.header("User-Agent",uas[new Random().nextInt(uas.length -1)])
				.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.,*/*;q=0.8")
				.header("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
				.header("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7")
				.header("Accept-Encoding", "gzip, deflate")
				.header("Cookie", "cy=9; cye=chongqing; _hc.v=82764dfe-d0bc-6812-f84f-605ff8dd2d3d.1479457884; __utma=205923334.1885237245.1479532378.1479532378.1479552043.2; __utmz=205923334.1479532378.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); s_ViewType=10; aburl=1; JSESSIONID="+JSESSIONID)
				.header("Connection", "keep-alive")
				.header("Upgrade-Insecure-Requests", "1")
				.timeout(5000)           // 设置连接超时时间
				.post(); // get方式访问
		return doc;
	}
	public static Document getDocumentByIp(String url) throws IOException {
		String ip = IpUtil.getIp();
		org.jsoup.Connection connect = Jsoup.connect(url);
		connect.header("X-Real-IP", ip)
		.header("X-Forwarded-For", ip)
		.header("WL-Proxy-Client-IP", ip)
		.header("Client-Ip", ip)
		.header("Host", "www.17wscz.com")
		.header("Referer", "http://www.17wscz.com/Farm")
		.header("User-Agent",uas[new Random().nextInt(uas.length -1)])
		.header("Accept", "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01")
		.header("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
		.header("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7")
		.header("Accept-Encoding", "gzip, deflate")
		.header("Cookie", "cy=9; cye=chongqing; _hc.v=82764dfe-d0bc-6812-f84f-605ff8dd2d3d.1479457884; __utma=205923334.1885237245.1479532378.1479532378.1479552043.2; __utmz=205923334.1479532378.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); s_ViewType=10; aburl=1; JSESSIONID="+JSESSIONID)
		.header("Connection", "keep-alive")
		.header("X-Requested-With", "XMLHttpRequest")
		.header("Upgrade-Insecure-Requests", "1")
		.header("Cookie", "Search_Vendor_List_ThemeName=; Search_Vendor_List_CountyId=; Search_Vendor_List_CountyName=");

		Response resp = connect.execute();
//		JSESSIONID = resp.cookies().get("JSESSIONID");
		
		return getDocByHtml(resp.body());
	}
	public static Document getDocByLocal(String fn) throws IOException {
		Document doc = null;
		doc =Jsoup.parse(new File(fn), "utf-8");
		return doc;
	}
	
	public static String getJsonContent(String url) throws IOException {
		String ip = IpUtil.getIp();
		return Jsoup.connect(url).timeout(60000).header("X-Real-IP", ip)
				.header("x-forwarded-for", ip)
				.header("WL-Proxy-Client-IP", ip)
				.userAgent(uas[new Random().nextInt(uas.length -1)]) // 设置 User-Agent
				.ignoreContentType(true).execute().body();
	}
	
	public static Document getDocByHtml(String html) {
		return  Jsoup.parse(html);
	}
	

	public static List<String> getDetailsData() {
		String sql = "select a.title from d_article_grap a";
		List<String> articles = new ArrayList<String>();
		try {
			Connection connection = DbUtil.getConn();
			Statement statement = connection.createStatement();
			ResultSet rs = statement.executeQuery(sql);
			while (rs.next()) {
				articles.add(rs.getString("title"));
			}
			rs.close();
			statement.close();
//			connection.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		return articles;
	}

	/**
	 * 去掉表情
	 * @param str
	 * @return
	 */
	public static String getUnUnicStr(String str) {
		return str.replaceAll("[\\ud83c\\udc00-\\ud83c\\udfff]|[\\ud83d\\udc00-\\ud83d\\udfff]|[\\u2600-\\u27ff]", "");
	}
}
